In [1]:
import numpy as np #linear algebra
import pandas as pd # data processing,CSV file I/O(e.g pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from warnings import filterwarnings 
filterwarnings("ignore")
In [2]:
data = sns.load_dataset("tips")
data
Out[2]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

In [3]:
data.head()
Out[3]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [4]:
data.shape
Out[4]:
(244, 7)
In [5]:
data.duplicated().sum()
Out[5]:
1
In [6]:
data.drop_duplicates(inplace=True)
In [7]:
data.duplicated().sum()
Out[7]:
0
In [8]:
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 243 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  243 non-null    float64 
 1   tip         243 non-null    float64 
 2   sex         243 non-null    category
 3   smoker      243 non-null    category
 4   day         243 non-null    category
 5   time        243 non-null    category
 6   size        243 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 9.1 KB
In [9]:
data.columns
Out[9]:
Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')
In [10]:
#VISUALIZATION
In [11]:
plt.bar(data['smoker'],data['total_bill'])
plt.xticks(rotation=90)
plt.show()
In [12]:
fig=px.bar(data,x='tip',y='day',color='tip')
fig.show()
In [13]:
fig=px.violin(data,x='time',y='sex',color='time')
fig.show()
In [14]:
plt.figure(figsize=(10,4))
sns.countplot(x='size', data=data, color='b')
plt.show()
In [15]:
sns.lineplot(x='size', y='total_bill', data=data)
Out[15]:
<AxesSubplot:xlabel='size', ylabel='total_bill'>
In [16]:
sns.barplot(data['size'],data['tip'],color='r')
plt.xticks(rotation=90)
plt.show()
In [17]:
sns.scatterplot(data=data, x='size', y='total_bill')
plt.xlabel('size')
plt.ylabel('total_bill')
plt.show()
In [18]:
sns.displot(data["sex"])
Out[18]:
<seaborn.axisgrid.FacetGrid at 0x1fe497a1f40>
In [19]:
sns.countplot(x='time',data=data)
plt.xticks(rotation=90)
Out[19]:
(array([0, 1]), [Text(0, 0, 'Lunch'), Text(1, 0, 'Dinner')])
In [20]:
sns.boxplot(x='day',y='tip',data=data)
Out[20]:
<AxesSubplot:xlabel='day', ylabel='tip'>
In [21]:
sns.violinplot(x='sex',y='size',data=data)
Out[21]:
<AxesSubplot:xlabel='sex', ylabel='size'>
In [22]:
#MODEL BUILDING
In [23]:
X = data[['total_bill','tip','size']]
X.head()
Out[23]:
total_bill tip size
0 16.99 1.01 2
1 10.34 1.66 3
2 21.01 3.50 3
3 23.68 3.31 2
4 24.59 3.61 4
In [24]:
y = data['sex']
y.head()
Out[24]:
0    Female
1      Male
2      Male
3      Male
4    Female
Name: sex, dtype: category
Categories (2, object): ['Male', 'Female']
In [25]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

y=le.fit_transform(y)
In [26]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.3)
In [27]:
from sklearn.ensemble import AdaBoostClassifier

abc = AdaBoostClassifier(n_estimators=50, learning_rate=1, random_state=0)

model = abc.fit(X_train, y_train)

y_pred = model.predict(X_test)
In [28]:
from sklearn.metrics import accuracy_score

print("AdaBoost Classifier Model Accuracy:",accuracy_score(y_test, y_pred))
AdaBoost Classifier Model Accuracy: 0.684931506849315
In [ ]:
 
In [ ]:
 
In [ ]: